# _*_ coding : utf-8 _*_
# @Time : 2023/3/15 0015 12:29
# @Author : 菜鸟王小二
# @File : 10_urllib_ajax的get请求豆瓣电影前5页
# @Project : python爬虫

import urllib.request
import urllib.parse


def getMovieData(page):
    url = 'https://movie.douban.com/j/search_subjects?'
    data = {
        'type': 'movie',
        'tag': '豆瓣高分',
        'page_limit': '10',
        'page_start': (page - 1) * 10
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63'
    }

    url = url + urllib.parse.urlencode(data)
    print(url)
    request = urllib.request.Request(url=url, headers=headers)
    return request


def get_text(request):
    response = urllib.request.urlopen(request)
    text = response.read().decode('utf-8')
    return text


def down_load(page,text):
    # 保存到本地
    with open('newMovie'+ str(page) +'.json', 'a', encoding='utf-8') as fp:
        fp.write(text)


if __name__ == '__main__':
    page_start = int(input('请输入起始页： '))
    page_end = int(input('请输入结束页： '))
    for page in range(page_start, page_end + 1):
        request = getMovieData(page)
        text = get_text(request)
        down_load(page,text)
